library(knitr)
library(ggplot2)
library(stringr)
suppressMessages(library(dplyr))
library(sp)
suppressMessages(library(rgdal))
suppressMessages(library(dismo))
suppressMessages(library(stargazer))
library(leaflet)
library(XML)
suppressMessages(library(maptools))
library(automap)
suppressMessages(library(RStata))
suppressMessages(library(fields))
library(gstat)
library(htmltools)
suppressMessages(library(Matching))
wd <- "/Users/mlowes/drive/optimized_agronomy/soil/soil_health_study/data/ke baseline"
dd <- paste(wd, "data", sep = "/")
od <- paste(wd, 'output', sep = "/")

load(paste("/Users/mlowes/drive/R_help/3_analyze/regressions", "regression_functions.Rdata", sep = "/"))
d <- read.csv(paste(dd, "KE SHS Baseline Survey Data.csv", sep = '/'))
soil <- read.csv(paste(dd, "Kenya_SHS_results.csv", sep = "/"))
cnP <- read.csv(paste(dd, "Kenya_SHS_N_C.csv", sep = "/"))
Identifiers <- read.csv(paste(dd, "Combined meta with SSN.csv", sep = '/'), stringsAsFactors = F)

First step is merging the data. The ids in the Identifiers data need to be adjusted so that they match the ids in the ids in CommCare

Identifiers$oaf.ID <- tolower(Identifiers$oaf.ID)
Identifiers$DISTRICT[Identifiers$DISTRICT=="Kakamega North"] <- "Kakamega B (North)"
Identifiers$DISTRICT[Identifiers$DISTRICT=="Kakamega South"] <- "Kakamega (South)"

d$id.DistrictName <- as.character(d$id.DistrictName)
d$id.DistrictName[d$id.DistrictName=="GemLundha"] <- "Gem"
d$id.DistrictName[d$id.DistrictName=="Lug Ari"] <- "Lugari"

d$id.Soil_Sample_Id <- tolower(d$id.Soil_Sample_Id)

Identifiers$sample_id <- ifelse(grepl("c", Identifiers$oaf.ID), Identifiers$oaf.ID, paste(tolower(Identifiers$DISTRICT), Identifiers$oaf.ID, sep = ""))

Identifiers$sample_id <- gsub(" ", "", Identifiers$sample_id)
d$id.Soil_Sample_Id <- gsub(" ", "", d$id.Soil_Sample_Id)
table(d$id.Soil_Sample_Id %in% Identifiers$sample_id)
## 
## FALSE  TRUE 
##    35  2033

We’re currently missing 35 matches. Investigate why we’re missing these. Below are the ids that do not appear in the Identifiers data but appear in the CommCare survey data:

d$id.Soil_Sample_Id[!d$id.Soil_Sample_Id %in% Identifiers$sample_id]
##  [1] "busia33787"            "rachuonyo40077"       
##  [3] "kakamegab(north)15878" "rongo33010"           
##  [5] "c1136"                 "rongo35982"           
##  [7] "gem45918"              "gem64358"             
##  [9] "c857"                  "rachuonyo42533"       
## [11] "rachuonyo39005"        "kakamegab(north)6413" 
## [13] "kakamegab(north)61463" "butere49984"          
## [15] "kisii43004"            "gem57424"             
## [17] "c703"                  "c290"                 
## [19] "busia34175"            "c241"                 
## [21] "webuye52641"           "c863"                 
## [23] "c706"                  "migori17557"          
## [25] "migori14465"           "rachuonyo16330"       
## [27] "matete54668"           "suneka30533"          
## [29] "c1121"                 "suneka16515"          
## [31] "lugari52284"           "kakamegab(north)63510"
## [33] "gem59393"              "rachuonyo22477"       
## [35] "c1176"
missing <- d$id.Soil_Sample_Id[!d$id.Soil_Sample_Id %in% Identifiers$sample_id]

Issues seem to be:

missingAll <- d[!d$id.Soil_Sample_Id %in% Identifiers$sample_id,]
write.csv(missingAll, paste(od, "missing_matches.csv", sep = "/"), row.names = F)

For the ids that should have an OAF id but don’t see if that oaf id exists in the Identifiers data.

missingId <- str_extract_all(missing,"\\(?[0-9,.]+\\)?")
missingId <- unlist(missingId)
table(missingId %in% Identifiers$oaf.ID)
## 
## FALSE  TRUE 
##    33     2
missingId[missingId %in% Identifiers$oaf.ID]
## [1] "61463" "63510"

These are the ids that appear in the Identifiers data but not in the CommCare surey data:

Identifiers$sample_id[!Identifiers$sample_id %in%  d$id.Soil_Sample_Id]
##  [1] "busia33789"            "kakamegab(north)15873"
##  [3] "butere44824"           "butere56493"          
##  [5] "butere59600"           "gem4598"              
##  [7] "gem37424"              "gem64558"             
##  [9] "c328"                  "c280"                 
## [11] "webuye1233"            "rachuonyo224777"      
## [13] "rachuonyo38459"        "rachuonyo163330"      
## [15] "rachuonyo425333"       "rachuonyo39009"       
## [17] "rachuonyo400777"       "suneka16575"          
## [19] "c1174"                 "rongo3310"            
## [21] "kisii3004"             "c627"                 
## [23] "migori7557"            "migori14342"          
## [25] "migori8936"            "migori14159"          
## [27] "migori7179"            "migori17315"          
## [29] "migori13503"           "c901"                 
## [31] "c894"                  "c892"                 
## [33] "c893"                  "c861"                 
## [35] "c862"                  "c895"                 
## [37] "c896"

Merge the data and drop non-merged obs

Come back to this. I don’t want to simply drop observations but we want to be working with full data for summaries and regressions.

d <- merge(d, Identifiers[,c("SSN", "sample_id")], by.x="id.Soil_Sample_Id", by.y="sample_id", all.x=TRUE)

d <- d[!is.na(d$SSN),]

Merge in the soil data

table(d$SSN %in% soil$SSN)
## 
## FALSE  TRUE 
##     1  2035
d <- merge(d, soil[,c(1,3,6:24)], by="SSN", all.x=TRUE)

Merge C and N predictions

table(d$SSN %in% cnP$SSN)
## 
## FALSE  TRUE 
##     1  2035
d <- merge(d, cnP, by="SSN", all.x=TRUE)
names(d)[names(d)=="OC_PLS1" | names(d)== "TN_PLS1"] <- c("Total.C", "Total.N")

Cleaning baseline variables

# take out weird CommCare stuff
d[d=="---"] <- NA

names(d) <- gsub("id.", "", names(d))
names(d) <- gsub("livestock.", "", names(d))
# seedtype to yield
names(d)[24:27] <- gsub("plot_information.", "", names(d)[24:27])

#inputs
names(d)[34:65] <- gsub("plot_information.", "", names(d)[34:65])

#intercrop
names(d)[28:33] <- gsub("plot_information.plot_information.","intercrop.",  names(d)[28:33])

names(d) <- gsub("historical.", "", names(d))
names(d) <- gsub("field_information.", "", names(d))

Recode variables to numeric

varlist <- c("seedkgs", "yield", "intercrop.seedkgs", "intercrop.yield",
             "inputs.dap_kg", "inputs.can_kg", "inputs.npk_kg", "inputs.urea_kg", "inputs.dap_kg_intercrop", "inputs.can_kg_intercrop",
             "inputs.npk_kg_intercrop", "inputs.urea_kg_intercrop",
             "inputs.lime_kg")

d[, varlist] <- sapply(d[,varlist], as.numeric)
d <- cbind(d, str_split_fixed(d$gps, " ", n=4))
names(d)[names(d)=="1" |names(d)== "2" | names(d)== "3" | names(d)== "4"] <- c("lat", "lon", "alt", "precision")
d[,c("lat", "lon", "alt", "precision")] <- sapply(
  d[,c("lat", "lon", "alt", "precision")],                                          function(x){as.numeric(as.character(x))})

Count the number of missing values in all soil variables

soilVars <- c("C.E.C", "Cu", "EC", "Exch.Al", "Hp", "K", "Mg", "Mn",
              "pH", "B", "Ca", "Fe", "Na", "P", "PSI", "S", "Zn", "Total.C",
              "Total.N")

naCount <- sapply(d[,soilVars], function(x){
  return(sum(is.na(x)))
}) 
naCount
##   C.E.C      Cu      EC Exch.Al      Hp       K      Mg      Mn      pH 
##       1       1       1       1       1       1       1       1       1 
##       B      Ca      Fe      Na       P     PSI       S      Zn Total.C 
##       1       1       1       1       1       1       1       1       1 
## Total.N 
##       1

Drop data with missing soil data - it’s not useful for later summaries and regressions

note: Come back and understand why we don’t have perfect matches.

d <- d[-which(is.na(d$Ca)),]
summary(d[,soilVars])
##      C.E.C              Cu                EC            Exch.Al        
##  Min.   : 2.614   Min.   : 0.8157   Min.   : 22.90   Min.   :0.002227  
##  1st Qu.: 7.047   1st Qu.: 2.1852   1st Qu.: 38.72   1st Qu.:0.049040  
##  Median : 9.567   Median : 3.0869   Median : 46.07   Median :0.085674  
##  Mean   :10.420   Mean   : 3.2745   Mean   : 47.52   Mean   :0.128068  
##  3rd Qu.:13.088   3rd Qu.: 4.2528   3rd Qu.: 54.15   3rd Qu.:0.142481  
##  Max.   :40.192   Max.   :10.5906   Max.   :101.09   Max.   :2.720619  
##        Hp                K                Mg               Mn        
##  Min.   :0.03122   Min.   : 35.39   Min.   : 31.16   Min.   : 40.01  
##  1st Qu.:0.22529   1st Qu.: 94.18   1st Qu.:108.72   1st Qu.:111.81  
##  Median :0.30606   Median :126.93   Median :157.62   Median :154.54  
##  Mean   :0.35452   Mean   :142.42   Mean   :181.78   Mean   :157.50  
##  3rd Qu.:0.42156   3rd Qu.:182.19   3rd Qu.:239.80   3rd Qu.:198.95  
##  Max.   :3.32301   Max.   :550.83   Max.   :579.63   Max.   :340.54  
##        pH              B                 Ca               Fe       
##  Min.   :4.379   Min.   :0.01646   Min.   : 138.8   Min.   : 58.5  
##  1st Qu.:5.573   1st Qu.:0.09394   1st Qu.: 628.2   1st Qu.:112.9  
##  Median :5.858   Median :0.14120   Median : 916.3   Median :132.2  
##  Mean   :5.876   Mean   :0.15279   Mean   :1064.3   Mean   :136.7  
##  3rd Qu.:6.149   3rd Qu.:0.19316   3rd Qu.:1303.7   3rd Qu.:156.7  
##  Max.   :8.784   Max.   :0.67119   Max.   :8246.8   Max.   :345.6  
##        Na              P                  PSI               S         
##  Min.   :17.59   Min.   :  0.05591   Min.   : 21.14   Min.   : 5.931  
##  1st Qu.:27.85   1st Qu.:  3.53380   1st Qu.: 87.12   1st Qu.:10.754  
##  Median :30.76   Median :  7.06593   Median :119.79   Median :12.646  
##  Mean   :31.53   Mean   : 11.98246   Mean   :125.76   Mean   :13.018  
##  3rd Qu.:34.56   3rd Qu.: 13.34762   3rd Qu.:157.05   3rd Qu.:14.852  
##  Max.   :58.39   Max.   :198.04271   Max.   :369.55   Max.   :29.903  
##        Zn            Total.C          Total.N       
##  Min.   : 1.682   Min.   :0.7858   Min.   :0.05101  
##  1st Qu.: 3.639   1st Qu.:1.7646   1st Qu.:0.12616  
##  Median : 4.511   Median :2.1194   Median :0.15722  
##  Mean   : 4.757   Mean   :2.1665   Mean   :0.15790  
##  3rd Qu.: 5.653   3rd Qu.:2.5581   3rd Qu.:0.18766  
##  Max.   :12.462   Max.   :4.5113   Max.   :0.31267

Check SiteName variable

#table(d$SiteName) many misspellings
d$SiteName <- tolower(d$SiteName)

Graphs of Kenya baseline soil variables

for(i in 1:length(soilVars)){
print(
  ggplot(data=d, aes(x=as.factor(oaf), y=d[,soilVars[i]])) + 
    geom_boxplot() +
    labs(x="One Acre Fund Farmer", y=soilVars[i], title = paste("Kenya baseline soil - ", soilVars[i], sep = ""))
  )  
}

pdf(paste(od, "ke_baseline_soil.pdf", sep = "/"), width=11, height=8.5)
print(
  ggplot(data=d, aes(x=as.factor(oaf), y=d[,soilVars[i]])) + 
    geom_boxplot() +
    labs(x="One Acre Fund Farmer", y=soilVars[i], title = paste("Kenya baseline soil - ", soilVars[i], sep = ""))
  )  
dev.off()
## quartz_off_screen 
##                 2

Soil chemical relationships

There are biologically predictable relationships between soil chemical characteristics. For instance, we expect Ca and Mg to move in the same direction and be positively correlated with pH. If we had Aluminum as an outcome, we’d expect pH to be negatively correlated with soluable aluminum. Let’s look quickly to confirm if those relationships are present:

rel1 <- ggplot(d, aes(x=Ca, y=Mg)) + geom_point() +
    stat_smooth(method = "loess") + 
    labs(x = "Calcium", y= "Magnesium", title="Calcium/Magnesium relationship")
print(rel1)

rel2 <- ggplot(d, aes(x=pH, y=Ca)) + geom_point() +
  stat_smooth(method = "loess") +
  labs(x = "pH", y="Calcium", title = "pH and Calcium relationship")
print(rel2)

rel3 <- ggplot(d, aes(x=pH, y=Exch.Al)) + geom_point() +
  stat_smooth(method = "loess") +
  labs(x = "pH", y="Exchangable Aluminum", title = "pH and Exch.Al relationship")
print(rel3)

rel4 <- ggplot(d, aes(x=Total.C, y=Total.N)) + geom_point() + 
  stat_smooth(method="loess") +
  labs(x = "Total Carbon", y="Total Nitrogen", title = "Carbon and Nitrogen relationship")


pdf(paste(od, "ke_baseline_soil_relationships.pdf", sep = "/"), width=11, height=8.5)
print(rel1) 
print(rel2)
print(rel3)
print(rel4)
dev.off()
## quartz_off_screen 
##                 2

Interpretation We see the predictable soil relationships we expected. This indicates that internally, our soil data are behaving in compliance with biological principles.

Save clean demographic and soil data to external file

write.csv(d, file=paste(dd, "shs ke baseline.csv", sep = "/"))
save(d, file=paste(dd, "shs ke baseline.Rdata", sep = "/"))

Summary statistics

Table of final baseline breakdown

count <- d %>% group_by(DistrictName) %>% 
  dplyr::summarize(
    t.count = sum(ifelse(oaf==1,1,0)),
    c.count = sum(ifelse(oaf==0,1,0)),
    total = n()
  ) %>% ungroup()

count <- as.data.frame(count)
write.csv(count, file=paste(od, "final ke sample breakdown.csv", sep="/"), row.names=F)
as.data.frame(count)
##          DistrictName t.count c.count total
## 1               Busia      43      45    88
## 2              Butere     116     119   235
## 3              Chwele      40      39    79
## 4                 Gem     134     135   269
## 5    Kakamega (South)      91      89   180
## 6  Kakamega B (North)      82      84   166
## 7               Kisii      47      50    97
## 8              Lugari      88      86   174
## 9              Matete      46      47    93
## 10             Migori      86      85   171
## 11          Rachuonyo      84      90   174
## 12              Rongo      78      78   156
## 13             Suneka      34      35    69
## 14             Webuye      42      42    84
d$valley <- ifelse(d$location=="valley", 1,0)
d$hilltop <- ifelse(d$location=="hilltop", 1,0)
d$hillside <- ifelse(d$location=="hillside", 1,0)
sort(prop.table(table(d$plot_information.maincrop, useNA = 'ifany')))
## 
##        sukuma    groundnuts        millet sweetpotatoes         beans 
##   0.001965602   0.002948403   0.002948403   0.003931204   0.004422604 
##       sorghum         sugar         other        fallow         maize 
##   0.005405405   0.006879607   0.017199017   0.023095823   0.931203931
d$crop.maize <- ifelse(d$plot_information.maincrop=="maize", 1,0)

Kgs of inputs per acre and hectare

Notes from Kalie: We should be dividing input quantity by land size. 100 kgs of inputs is the soft ceiling for OAF fields as we didn’t offer more than 100 kgs of fertilizer per acre. It’s also impossible for an OAF field to be smaller than 0.25 acres.

ggplot(d, aes(x=field_size, y=inputs.dap_kg)) + geom_point()
## Warning: Removed 479 rows containing missing values (geom_point).

This would mean however that if I want to get the actual amount they applied to their field to see that that quantity also makes sense, I would multiply by the size of the field.

d$dap.check <- d$inputs.dap_kg*d$field_size
ggplot(d, aes(x=dap.check)) + geom_density()
## Warning: Removed 479 rows containing non-finite values (stat_density).

Multiplying certainly keeps us in a more normal range. The scatter plot looks okay as well. The variance in per acre application rate increases with the size of the plot.

ggplot(d, aes(x=field_size, y=dap.check)) + geom_point()
## Warning: Removed 479 rows containing missing values (geom_point).

However, if the input quatity variable is already per plot size, as the codebook suggests, I’d want to divide by the size of the plot to get the per acre application rate. This is what Kalie suggests

d$dap.acre <- d$inputs.dap_kg/d$field_size
d$can.acre <- d$inputs.can_kg/d$field_size
d$npk.acre <- d$inputs.npk_kg/d$field_size
d$urea.acre <- d$inputs.urea_kg/d$field_size
d$compost.acre <- d$inputs.compost/d$field_size

acreInputs <- names(d)[grep("acre", names(d))]

Print out the calculation to make certain I’m doing this right:

#head(d[, c("inputs.dap_kg", "field_size", "dap.acre")])
d[which.max(d$dap.acre), c("inputs.dap_kg", "field_size", "dap.acre", "oaf")]
##     inputs.dap_kg field_size dap.acre oaf
## 831            47     0.0625      752   0
for(i in 1:length(acreInputs)){
  print(
  ggplot(data=d, aes(x=d[,acreInputs[i]])) +
    geom_histogram() +
    facet_wrap(~ oaf, scales='free') +
    labs(x=acreInputs[i], title = paste("Kenya input quantity per acre - ", acreInputs[i], sep = ""))
  )  
}
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 479 rows containing non-finite values (stat_bin).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1099 rows containing non-finite values (stat_bin).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1999 rows containing non-finite values (stat_bin).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1822 rows containing non-finite values (stat_bin).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

These values are totally unreasonable! These per acre (and per hectare) values seem too big. Let’s look at the per acre rates by field size

for(i in 1:length(acreInputs)){
print(
    ggplot(d, aes(x = field_size, y=d[, acreInputs[i]])) + 
    geom_point() + 
    labs(x = "Field Size in Acres", y=acreInputs[i], title = paste("Kenya input quantity by acreage - ", acreInputs[i], sep = ""))
  )
}
## Warning: Removed 479 rows containing missing values (geom_point).

## Warning: Removed 1099 rows containing missing values (geom_point).

## Warning: Removed 1999 rows containing missing values (geom_point).

## Warning: Removed 1822 rows containing missing values (geom_point).

Cleaning input variables

d$field_size.adj <- ifelse(d$oaf==1 & d$field_size<0.25, 0.25, d$field_size)

# drop really small field sizes >> they're inflating the application rates
d$field_size.adj <- ifelse(d$field_size<0.125, 0.125, d$field_size.adj)


# updated version of the input/acre variables - winsoring values to 100 kg.
d$dap.acre <- d$inputs.dap_kg/d$field_size.adj
d$dap.acre <- ifelse(d$dap.acre>100, 100, d$dap.acre)

d$can.acre <- d$inputs.can_kg/d$field_size.adj
d$can.acre <- ifelse(d$can.acre>100, 100, d$can.acre)

d$npk.acre <- d$inputs.npk_kg/d$field_size.adj
d$npk.acre <- ifelse(d$npk.acre>100, 100, d$npk.acre)

d$urea.acre <- d$inputs.urea_kg/d$field_size.adj
d$urea.acre <- ifelse(d$urea.acre>100, 100, d$urea.acre)

d$compost.are <- d$inputs.compost/d$field_size.adj
for(i in 1:length(acreInputs)){
print(
    ggplot(d, aes(x = field_size.adj, y=d[, acreInputs[i]], color=as.factor(oaf))) + 
    geom_point() + 
    #facet_wrap(~oaf) +
    labs(x = "Field Size in Acres", y=acreInputs[i], title = paste("Kenya input quantity by acreage - ", acreInputs[i], sep = ""))
)
}
## Warning: Removed 479 rows containing missing values (geom_point).

## Warning: Removed 1099 rows containing missing values (geom_point).

## Warning: Removed 1999 rows containing missing values (geom_point).

## Warning: Removed 1822 rows containing missing values (geom_point).

Kim says: While the values vary far more than we see in typical M&E data, we don’t have a good reference point for fertilizer application rates during the short rains, the period about which we asked. It’s conceivable that the fertilizer/acre quantity varies this much, even among OAF farmers. I’m going to go with that for now.

Baseline balance

d$own.cows <- ifelse(d$cows>0 & !is.na(d$cows), 1,0)
d$own.goats <- ifelse(d$goats>0 & !is.na(d$goats), 1,0)
d$own.chickens <- ifelse(d$chickens>0 & !is.na(d$chickens), 1,0)
d$own.pigs <- ifelse(d$pigs>0 & !is.na(d$pigs), 1,0)
d$own.sheep <- ifelse(d$sheep>0 & !is.na(d$sheep), 1,0)

Let’s see how balanced our farmers are in terms of demographic variables. One Acre Fund farmers were selected based on (list criteria) and control farmers in the same area tha fit the same criteria were also selected. No matching process has been performed to identify the control farmers that most closely resemble the One Acre Fund farmers in the sample.

names(d)[names(d)=="gender"] <- "female"

out.list <- c("female", "age", "cows", "goats", "chickens",
              "pigs", "sheep", "field_size", "yield", "dap.acre", "can.acre", "npk.acre", "urea.acre", "chemical_fertilizer_5", "compost_fertilizer_5", "lime_fertilizer_5",
"fertilizer_5", "legum_maincrop_5", "legum_intercrop_5", soilVars, "valley", "hilltop", "hillside", "crop.maize", "alt", "own.cows", "own.goats", "own.chickens", "own.pigs", "own.sheep")

d$seasons_oaf <- ifelse(d$seasons_oaf>8, NA, d$seasons_oaf)

              
output <- do.call(rbind, lapply(out.list, function(x) {
  
  out <- t.test(d[,x] ~ d[,"oaf"], data=d)
  tab <- data.frame(out[[5]][[2]],out[[5]][[1]], out[3])
  tab[,1:2] <- round(tab[,1:2],3)
  names(tab) <- c(names(out[[5]]), "pvalue")
  return(tab)
}))

# use p.adjust with bonferroni correction
output$pvalue <- p.adjust(output$pvalue, method="fdr")
rownames(output) <- out.list
output <- output[order(output$pvalue),]
output$pvalue <- ifelse(output[, 3] < 0.001, "< 0.001", round(output[, 3], 3)) 
colnames(output) <- c("1AF Client","Control", "p-value")    
print(kable(output))
1AF Client Control p-value
yield 57.479 49.790 < 0.001
chickens 11.247 7.953 < 0.001
own.cows 0.760 0.655 < 0.001
cows 2.111 1.656 < 0.001
own.chickens 0.908 0.857 0.004
can.acre 43.905 50.311 0.007
dap.acre 51.401 56.489 0.01
B 0.147 0.158 0.015
Ca 1021.661 1106.482 0.015
age 46.187 44.361 0.015
pH 5.845 5.907 0.015
own.goats 0.233 0.186 0.032
legum_intercrop_5 2.381 2.896 0.032
goats 0.555 0.422 0.034
own.pigs 0.073 0.048 0.053
female 0.706 0.660 0.076
EC 46.941 48.092 0.077
pigs 0.199 0.109 0.102
own.sheep 0.173 0.144 0.172
C.E.C 10.257 10.582 0.212
Hp 0.362 0.347 0.212
sheep 0.468 0.375 0.241
K 140.274 144.533 0.268
S 13.122 12.916 0.268
crop.maize 0.923 0.939 0.268
Mg 178.907 184.621 0.308
Exch.Al 0.132 0.124 0.581
field_size 0.540 0.526 0.681
npk.acre 33.733 27.557 0.752
compost_fertilizer_5 1.737 1.615 0.752
lime_fertilizer_5 -0.059 -0.174 0.752
fertilizer_5 -0.170 -0.012 0.752
Fe 137.280 136.182 0.752
Na 31.446 31.611 0.752
P 12.205 11.763 0.752
valley 0.718 0.729 0.8
Mn 158.096 156.909 0.803
hillside 0.257 0.248 0.803
PSI 126.270 125.265 0.829
chemical_fertilizer_5 3.247 3.176 0.889
legum_maincrop_5 0.076 0.130 0.889
Total.C 2.170 2.163 0.889
hilltop 0.025 0.023 0.948
urea.acre 42.139 42.028 0.992
Cu 3.273 3.276 0.992
Zn 4.757 4.758 0.992
Total.N 0.158 0.158 0.992
alt 1268.062 1268.290 0.992

Demographic variables:

Agricultural practice variables:

Soil Variables:

t10order <- c("age", "female")
table10vars <- paste(t10order, collapse="|")

ke.table10 <- output[grepl(table10vars, rownames(output)),]
ke.table10 <- ke.table10[order(match(rownames(ke.table10), t10order)),]
write.csv(ke.table10, file=paste(od, "pre match balance table10.csv", sep="/"),
          row.names = T)

t11order <- c("field_size", "alt", "hilltop", "hillside", "valley", "crop.maize")
table11vars <- paste(t11order, collapse="|")
ke.table11 <- output[grepl(table11vars, rownames(output)),]
ke.table11 <- ke.table11[order(match(rownames(ke.table11), t11order)),]
write.csv(ke.table11, file=paste(od, "pre match balance table 11.csv", sep = "/"),
          row.names=T)

t12order <- c("own.cows",  "cows", "own.pigs", "pigs", "own.sheep", "sheep", "own.goats", "goats", "own.chickens", "chickens")
table12vars <- paste(t12order, collapse="|")
ke.table12 <- output[grepl(table12vars, rownames(output)),]
ke.table12 <- ke.table12[order(match(rownames(ke.table12), t12order)),]
write.csv(ke.table12, file=paste(od, "pre match balance table 12.csv", sep = "/"),
          row.names=T)
#write table
write.csv(output, file=paste(od, "ke baseline balance.csv", sep="/"), row.names=T)

Baseline balance by district

I remove intercrop elements from out.list so that the code runs properly. I have to remove all fertilizer comparisons. CAN fails in Suneka. The others fail across districts due to insuffient observations.

toMatch <- c("npk", "urea", "g_intercrop", "can")
distList <- out.list[!out.list %in% unique(grep(paste(toMatch, collapse="|"), out.list, value=TRUE))]


dist.output <- do.call(rbind, lapply(split(d, d$DistrictName), function(x) {
  
  tab <- do.call(rbind, lapply(distList, function(y) {
    out <- t.test(x[,y] ~ x[,"oaf"], data=x)
    tab <- data.frame(out[[5]][[2]], out[[5]][[1]], out[3])
    tab[,1:2] <- round(tab[,1:2],3)
    names(tab) <- c(names(out[[5]]), "pvalue")
    return(tab)
  }))
  
  return(data.frame(district = unique(x$DistrictName), tab))
}))

rownames(dist.output) <- NULL
dist.output$variable <- rep(distList,length(unique(d$DistrictName)))    

# order variables 
dist.output <- dist.output[, c(1, 5, 2:4)]
dist.output$pvalue <- p.adjust(dist.output$pvalue, method="fdr")
dist.output <- dist.output[order(dist.output$pvalue),]

dist.output$pvalue <- ifelse(dist.output$pvalue < 0.001, "< 0.001", round(dist.output$pvalue,3))
colnames(dist.output) <- c("District", "Varible", "1AF Client","Control", "p-value")    
print(kable(dist.output))
District Varible 1AF Client Control p-value
199 Kakamega (South) EC 38.026 44.894 < 0.001
257 Kakamega B (North) S 11.420 10.092 < 0.001
466 Rachuonyo legum_intercrop_5 1.202 2.478 < 0.001
209 Kakamega (South) Na 30.142 32.164 < 0.001
213 Kakamega (South) Zn 4.423 5.126 < 0.001
202 Kakamega (South) K 104.804 131.847 0.006
197 Kakamega (South) C.E.C 8.146 9.899 0.009
244 Kakamega B (North) EC 46.863 54.240 0.009
617 Webuye S 12.120 10.849 0.01
536 Rongo own.cows 0.821 0.551 0.015
203 Kakamega (South) Mg 136.679 168.831 0.017
212 Kakamega (South) S 16.017 14.313 0.017
514 Rongo EC 61.226 54.433 0.018
54 Butere yield 60.086 48.496 0.021
205 Kakamega (South) pH 5.714 5.945 0.021
251 Kakamega B (North) B 0.152 0.198 0.021
55 Butere dap.acre 48.163 65.737 0.021
207 Kakamega (South) Ca 777.513 1002.114 0.025
189 Kakamega (South) yield 60.187 47.404 0.029
279 Kisii yield 51.022 35.080 0.029
258 Kakamega B (North) Zn 4.207 4.785 0.031
206 Kakamega (South) B 0.106 0.146 0.037
572 Suneka S 12.862 14.286 0.038
93 Chwele cows 2.300 1.051 0.057
483 Rachuonyo Zn 5.316 4.501 0.059
559 Suneka EC 54.096 47.497 0.059
88 Butere own.chickens 0.931 0.798 0.064
131 Chwele own.cows 0.800 0.487 0.075
101 Chwele chemical_fertilizer_5 4.100 2.974 0.089
250 Kakamega B (North) pH 5.829 6.055 0.09
360 Lugari own.sheep 0.364 0.174 0.09
498 Rongo cows 2.372 1.487 0.09
242 Kakamega B (North) C.E.C 9.470 11.305 0.1
458 Rachuonyo field_size 0.497 0.396 0.111
556 Suneka legum_intercrop_5 3.324 4.343 0.113
241 Kakamega B (North) legum_intercrop_5 2.805 3.583 0.131
255 Kakamega B (North) P 10.114 14.049 0.131
500 Rongo chickens 9.897 5.551 0.131
557 Suneka C.E.C 12.912 11.025 0.131
562 Suneka K 186.138 156.538 0.131
39 Busia crop.maize 0.767 0.956 0.161
254 Kakamega B (North) Na 28.847 30.348 0.161
320 Lugari chickens 13.341 8.767 0.161
474 Rachuonyo Mn 207.654 183.951 0.161
79 Butere Total.C 1.882 2.030 0.163
84 Butere crop.maize 0.948 1.000 0.166
144 Gem yield 53.281 45.119 0.166
234 Kakamega B (North) yield 69.341 59.321 0.166
247 Kakamega B (North) K 124.058 144.309 0.166
280 Kisii dap.acre 45.537 63.035 0.166
322 Lugari sheep 1.091 0.442 0.166
527 Rongo S 11.638 12.507 0.166
548 Suneka field_size 0.445 0.305 0.166
550 Suneka dap.acre 53.032 72.151 0.166
248 Kakamega B (North) Mg 150.277 178.955 0.174
615 Webuye P 6.752 10.124 0.174
334 Lugari EC 46.502 50.408 0.176
142 Gem sheep 0.761 0.333 0.192
198 Kakamega (South) Cu 3.401 3.705 0.192
228 Kakamega B (North) cows 2.354 1.702 0.192
566 Suneka B 0.178 0.149 0.192
575 Suneka Total.N 0.191 0.175 0.192
210 Kakamega (South) P 4.821 6.378 0.195
221 Kakamega (South) own.cows 0.824 0.674 0.195
341 Lugari B 0.169 0.200 0.198
604 Webuye EC 42.309 47.568 0.202
567 Suneka Ca 1197.599 988.904 0.221
252 Kakamega B (North) Ca 952.727 1186.483 0.225
416 Migori chemical_fertilizer_5 2.826 2.106 0.226
230 Kakamega B (North) chickens 10.683 7.702 0.228
419 Migori fertilizer_5 0.081 0.271 0.234
614 Webuye Na 27.217 29.037 0.237
180 Gem own.sheep 0.246 0.141 0.239
499 Rongo goats 1.026 0.551 0.239
5 Busia chickens 17.093 8.178 0.243
318 Lugari cows 2.045 1.291 0.243
179 Gem own.pigs 0.052 0.007 0.249
563 Suneka Mg 230.982 196.650 0.249
452 Rachuonyo age 48.381 44.611 0.254
92 Chwele age 46.625 39.410 0.258
9 Busia yield 58.244 46.023 0.291
459 Rachuonyo yield 52.155 45.178 0.291
537 Rongo own.goats 0.397 0.244 0.294
50 Butere chickens 10.879 7.941 0.294
87 Butere own.goats 0.207 0.109 0.294
611 Webuye B 0.158 0.192 0.308
359 Lugari own.pigs 0.045 0.000 0.313
551 Suneka chemical_fertilizer_5 4.118 4.771 0.313
375 Matete legum_maincrop_5 1.283 0.681 0.315
226 Kakamega B (North) female 0.720 0.571 0.316
149 Gem fertilizer_5 0.493 0.222 0.319
574 Suneka Total.C 2.642 2.425 0.319
339 Lugari Mn 119.181 108.006 0.343
141 Gem pigs 0.150 0.007 0.345
185 Kakamega (South) chickens 8.890 6.820 0.353
525 Rongo P 35.577 24.612 0.361
321 Lugari pigs 0.068 0.000 0.362
140 Gem chickens 11.739 8.400 0.365
224 Kakamega (South) own.pigs 0.231 0.124 0.371
528 Rongo Zn 6.361 5.807 0.371
80 Butere Total.N 0.133 0.140 0.384
86 Butere own.cows 0.741 0.630 0.398
356 Lugari own.cows 0.693 0.558 0.398
269 Kakamega B (North) own.pigs 0.110 0.036 0.402
275 Kisii chickens 9.106 6.020 0.435
2 Busia age 44.465 39.222 0.446
174 Gem crop.maize 0.910 0.963 0.446
219 Kakamega (South) crop.maize 1.000 0.966 0.472
115 Chwele pH 6.040 6.306 0.474
232 Kakamega B (North) sheep 0.195 0.048 0.485
35 Busia Total.N 0.174 0.160 0.486
201 Kakamega (South) Hp 0.497 0.432 0.486
231 Kakamega B (North) pigs 0.244 0.048 0.486
407 Migori age 47.512 43.529 0.486
49 Butere goats 0.431 0.235 0.49
414 Migori yield 54.430 47.659 0.5
479 Rachuonyo Na 35.768 34.687 0.5
133 Chwele own.chickens 0.875 0.974 0.503
453 Rachuonyo cows 2.155 1.689 0.504
34 Busia Total.C 2.162 2.012 0.552
146 Gem chemical_fertilizer_5 3.254 3.622 0.552
196 Kakamega (South) legum_intercrop_5 2.385 4.247 0.552
410 Migori chickens 12.163 9.153 0.552
246 Kakamega B (North) Hp 0.311 0.267 0.553
270 Kakamega B (North) own.sheep 0.098 0.036 0.553
117 Chwele Ca 851.837 1296.267 0.554
204 Kakamega (South) Mn 179.496 187.604 0.554
349 Lugari Total.C 2.288 2.164 0.554
361 Matete female 0.804 0.660 0.554
376 Matete legum_intercrop_5 3.022 3.553 0.554
74 Butere Na 28.870 29.641 0.562
454 Rachuonyo goats 0.905 0.567 0.562
421 Migori legum_intercrop_5 0.721 1.106 0.566
43 Busia own.chickens 0.930 0.822 0.575
273 Kisii cows 1.851 1.400 0.586
347 Lugari S 11.077 10.692 0.586
462 Rachuonyo compost_fertilizer_5 0.952 1.356 0.586
468 Rachuonyo Cu 4.410 4.093 0.586
75 Butere P 11.216 13.972 0.608
1 Busia female 0.558 0.711 0.61
569 Suneka Na 34.168 32.524 0.61
627 Webuye own.goats 0.143 0.048 0.61
136 Gem female 0.761 0.681 0.621
448 Migori own.chickens 0.953 0.894 0.621
589 Webuye goats 0.262 0.071 0.622
402 Matete own.goats 0.217 0.106 0.631
82 Butere hilltop 0.000 0.017 0.632
109 Chwele EC 45.541 49.370 0.632
170 Gem Total.N 0.146 0.152 0.632
182 Kakamega (South) age 48.769 51.955 0.632
243 Kakamega B (North) Cu 2.469 2.682 0.632
480 Rachuonyo P 10.026 7.528 0.632
596 Webuye chemical_fertilizer_5 4.262 3.762 0.632
610 Webuye pH 5.823 5.987 0.632
151 Gem legum_intercrop_5 3.216 3.548 0.633
154 Gem EC 39.355 38.185 0.633
616 Webuye PSI 125.782 113.467 0.645
72 Butere Ca 672.203 733.903 0.655
289 Kisii EC 51.686 53.946 0.655
358 Lugari own.chickens 0.898 0.826 0.655
266 Kakamega B (North) own.cows 0.866 0.786 0.661
305 Kisii Total.N 0.222 0.229 0.661
408 Migori cows 2.709 2.141 0.661
478 Rachuonyo Fe 135.521 128.348 0.661
237 Kakamega B (North) compost_fertilizer_5 1.866 0.190 0.662
176 Gem own.cows 0.672 0.593 0.665
364 Matete goats 0.543 0.255 0.671
107 Chwele C.E.C 7.433 9.089 0.674
164 Gem Na 29.773 29.227 0.674
367 Matete sheep 0.087 0.319 0.674
491 Rachuonyo own.cows 0.798 0.711 0.674
607 Webuye K 117.889 132.467 0.674
612 Webuye Ca 875.015 1030.262 0.674
348 Lugari Zn 3.924 3.713 0.681
70 Butere pH 5.530 5.604 0.688
473 Rachuonyo Mg 265.564 246.873 0.688
486 Rachuonyo valley 0.560 0.656 0.688
565 Suneka pH 5.772 5.659 0.688
593 Webuye field_size 0.545 0.653 0.688
186 Kakamega (South) pigs 0.769 0.337 0.708
463 Rachuonyo lime_fertilizer_5 0.107 0.033 0.708
16 Busia legum_intercrop_5 2.279 2.800 0.71
57 Butere compost_fertilizer_5 2.276 1.933 0.71
121 Chwele PSI 58.827 51.199 0.71
431 Migori B 0.188 0.174 0.71
437 Migori S 12.367 12.757 0.71
573 Suneka Zn 5.966 5.538 0.71
451 Rachuonyo female 0.679 0.589 0.723
374 Matete fertilizer_5 0.239 0.532 0.728
67 Butere K 98.236 104.079 0.735
68 Butere Mg 123.018 131.436 0.735
521 Rongo B 0.164 0.149 0.735
313 Kisii own.chickens 0.872 0.780 0.739
404 Matete own.pigs 0.109 0.043 0.739
27 Busia Ca 969.547 1141.030 0.745
183 Kakamega (South) cows 1.967 1.685 0.745
302 Kisii S 14.119 13.761 0.745
379 Matete EC 43.147 45.389 0.745
138 Gem cows 1.970 1.659 0.748
223 Kakamega (South) own.chickens 0.901 0.843 0.748
28 Busia Fe 130.328 122.565 0.758
33 Busia Zn 5.128 4.779 0.758
37 Busia hilltop 0.000 0.022 0.758
42 Busia own.goats 0.256 0.156 0.758
48 Butere cows 2.017 1.748 0.758
58 Butere lime_fertilizer_5 0.017 0.000 0.758
60 Butere legum_maincrop_5 0.181 0.118 0.758
83 Butere hillside 0.009 0.000 0.758
94 Chwele goats 0.275 0.487 0.758
102 Chwele compost_fertilizer_5 1.950 1.436 0.758
111 Chwele Hp 0.243 0.206 0.758
122 Chwele S 9.807 9.423 0.758
127 Chwele hilltop 0.025 0.000 0.758
137 Gem age 47.642 45.556 0.758
169 Gem Total.C 1.811 1.868 0.758
181 Kakamega (South) female 0.736 0.663 0.758
188 Kakamega (South) field_size 0.544 0.485 0.758
193 Kakamega (South) lime_fertilizer_5 0.022 0.090 0.758
194 Kakamega (South) fertilizer_5 -1.044 0.034 0.758
214 Kakamega (South) Total.C 2.035 2.106 0.758
227 Kakamega B (North) age 45.805 43.917 0.758
236 Kakamega B (North) chemical_fertilizer_5 4.244 2.869 0.758
238 Kakamega B (North) lime_fertilizer_5 0.085 -1.179 0.758
253 Kakamega B (North) Fe 124.110 120.553 0.758
260 Kakamega B (North) Total.N 0.157 0.151 0.758
262 Kakamega B (North) hilltop 0.012 0.000 0.758
267 Kakamega B (North) own.goats 0.280 0.214 0.758
284 Kisii fertilizer_5 0.106 0.240 0.758
293 Kisii Mg 275.137 290.799 0.758
296 Kisii B 0.210 0.225 0.758
317 Lugari age 46.148 43.942 0.758
325 Lugari dap.acre 48.967 44.422 0.758
337 Lugari K 131.120 139.581 0.758
343 Lugari Fe 123.110 118.683 0.758
369 Matete yield 62.239 56.717 0.758
373 Matete lime_fertilizer_5 0.000 0.021 0.758
395 Matete Total.N 0.145 0.137 0.758
400 Matete alt 1546.540 1596.905 0.758
405 Matete own.sheep 0.065 0.128 0.758
406 Migori female 0.581 0.506 0.758
417 Migori compost_fertilizer_5 1.686 2.024 0.758
418 Migori lime_fertilizer_5 0.023 0.000 0.758
429 Migori Mn 182.830 176.837 0.758
433 Migori Fe 169.676 174.534 0.758
455 Rachuonyo chickens 9.000 7.622 0.758
465 Rachuonyo legum_maincrop_5 0.119 0.067 0.758
467 Rachuonyo C.E.C 13.759 13.063 0.758
487 Rachuonyo hilltop 0.036 0.011 0.758
505 Rongo dap.acre 46.444 53.115 0.758
524 Rongo Na 36.941 35.865 0.758
529 Rongo Total.C 2.304 2.221 0.758
545 Suneka chickens 12.529 5.829 0.758
547 Suneka sheep 0.000 0.029 0.758
553 Suneka lime_fertilizer_5 0.059 0.000 0.758
577 Suneka hilltop 0.029 0.000 0.758
585 Suneka own.sheep 0.000 0.029 0.758
586 Webuye female 0.738 0.833 0.758
591 Webuye pigs 0.000 0.024 0.758
594 Webuye yield 68.143 62.905 0.758
599 Webuye fertilizer_5 0.119 0.000 0.758
601 Webuye legum_intercrop_5 3.167 3.619 0.758
608 Webuye Mg 145.766 162.641 0.758
624 Webuye crop.maize 1.000 0.976 0.758
629 Webuye own.pigs 0.000 0.024 0.758
602 Webuye C.E.C 9.007 9.813 0.766
488 Rachuonyo hillside 0.405 0.333 0.766
287 Kisii C.E.C 13.942 14.492 0.776
450 Migori own.sheep 0.209 0.153 0.776
618 Webuye Zn 3.855 4.111 0.776
620 Webuye Total.N 0.161 0.154 0.776
295 Kisii pH 5.764 5.842 0.778
40 Busia alt 1198.566 1225.322 0.78
62 Butere C.E.C 7.720 8.072 0.78
112 Chwele K 103.957 113.435 0.78
159 Gem Mn 166.868 171.951 0.78
239 Kakamega B (North) fertilizer_5 -1.049 0.071 0.78
278 Kisii field_size 0.343 0.309 0.78
323 Lugari field_size 0.688 0.753 0.78
415 Migori dap.acre 54.533 61.589 0.78
422 Migori C.E.C 15.104 14.547 0.78
427 Migori K 215.241 207.129 0.78
446 Migori own.cows 0.791 0.729 0.78
482 Rachuonyo S 12.912 13.322 0.78
517 Rongo K 182.358 171.577 0.78
4 Busia goats 0.535 0.333 0.78
18 Busia Cu 4.016 3.715 0.78
19 Busia EC 47.956 45.685 0.78
41 Busia own.cows 0.651 0.556 0.78
89 Butere own.pigs 0.034 0.059 0.78
104 Chwele fertilizer_5 0.225 0.077 0.78
191 Kakamega (South) chemical_fertilizer_5 4.527 4.348 0.78
195 Kakamega (South) legum_maincrop_5 -0.736 0.247 0.78
259 Kakamega B (North) Total.C 2.248 2.182 0.78
291 Kisii Hp 0.478 0.413 0.78
503 Rongo field_size 0.484 0.452 0.78
510 Rongo legum_maincrop_5 0.359 0.462 0.78
561 Suneka Hp 0.428 0.478 0.78
570 Suneka P 9.322 7.719 0.78
542 Suneka age 46.235 43.114 0.783
439 Migori Total.C 2.637 2.698 0.795
512 Rongo C.E.C 12.273 11.640 0.795
605 Webuye Exch.Al 0.088 0.102 0.795
588 Webuye cows 2.119 1.786 0.799
25 Busia pH 5.906 5.997 0.802
382 Matete K 114.570 122.255 0.82
73 Butere Fe 141.381 144.766 0.828
126 Chwele valley 0.900 0.949 0.828
345 Lugari P 11.787 10.411 0.828
595 Webuye dap.acre 53.223 47.324 0.828
606 Webuye Hp 0.291 0.269 0.828
200 Kakamega (South) Exch.Al 0.176 0.151 0.831
15 Busia legum_maincrop_5 0.023 0.067 0.832
116 Chwele B 0.134 0.150 0.832
190 Kakamega (South) dap.acre 51.854 55.610 0.832
393 Matete Zn 4.093 4.256 0.832
461 Rachuonyo chemical_fertilizer_5 2.702 2.889 0.832
492 Rachuonyo own.goats 0.321 0.267 0.832
493 Rachuonyo own.chickens 0.845 0.800 0.832
506 Rongo chemical_fertilizer_5 2.949 3.179 0.832
540 Rongo own.sheep 0.179 0.231 0.832
582 Suneka own.goats 0.147 0.086 0.832
592 Webuye sheep 0.095 0.190 0.832
65 Butere Exch.Al 0.228 0.196 0.835
113 Chwele Mg 107.356 117.334 0.835
211 Kakamega (South) PSI 145.493 140.141 0.835
256 Kakamega B (North) PSI 125.869 121.524 0.835
377 Matete C.E.C 8.499 9.037 0.835
394 Matete Total.C 2.101 2.020 0.835
504 Rongo yield 48.329 45.039 0.835
530 Rongo Total.N 0.151 0.147 0.835
619 Webuye Total.C 2.106 2.015 0.835
444 Migori crop.maize 0.907 0.871 0.835
20 Busia Exch.Al 0.124 0.104 0.839
143 Gem field_size 0.474 0.447 0.84
464 Rachuonyo fertilizer_5 0.417 0.311 0.84
470 Rachuonyo Exch.Al 0.095 0.106 0.84
46 Butere female 0.733 0.689 0.841
32 Busia S 14.130 13.689 0.847
118 Chwele Fe 135.330 130.891 0.848
309 Kisii crop.maize 0.851 0.900 0.851
71 Butere B 0.102 0.107 0.855
95 Chwele chickens 10.025 8.385 0.855
261 Kakamega B (North) valley 0.829 0.869 0.855
172 Gem hilltop 0.022 0.037 0.856
178 Gem own.chickens 0.940 0.919 0.856
274 Kisii goats 0.702 0.540 0.856
403 Matete own.chickens 0.935 0.894 0.856
471 Rachuonyo Hp 0.290 0.307 0.856
535 Rongo alt 1283.056 1232.582 0.856
538 Rongo own.chickens 0.885 0.846 0.856
21 Busia Hp 0.330 0.307 0.859
173 Gem hillside 0.358 0.319 0.859
389 Matete Na 29.027 29.532 0.859
520 Rongo pH 5.899 5.856 0.859
114 Chwele Mn 92.440 96.506 0.86
168 Gem Zn 4.140 4.232 0.86
423 Migori Cu 4.040 3.945 0.86
222 Kakamega (South) own.goats 0.143 0.180 0.863
240 Kakamega B (North) legum_maincrop_5 0.622 0.488 0.863
294 Kisii Mn 221.671 215.785 0.863
558 Suneka Cu 4.218 4.054 0.863
11 Busia chemical_fertilizer_5 0.767 2.333 0.871
167 Gem S 15.437 15.655 0.871
229 Kakamega B (North) goats 0.500 0.405 0.871
292 Kisii K 212.583 218.497 0.871
383 Matete Mg 133.007 142.882 0.871
64 Butere EC 40.878 41.683 0.874
225 Kakamega (South) own.sheep 0.055 0.079 0.874
312 Kisii own.goats 0.319 0.260 0.874
472 Rachuonyo K 191.338 185.835 0.874
428 Migori Mg 291.552 283.010 0.875
526 Rongo PSI 92.332 95.860 0.875
625 Webuye alt 1540.877 1568.134 0.875
66 Butere Hp 0.445 0.426 0.883
215 Kakamega (South) Total.N 0.156 0.160 0.883
365 Matete chickens 13.674 11.894 0.883
387 Matete Ca 809.328 858.827 0.883
352 Lugari hilltop 0.011 0.023 0.894
380 Matete Exch.Al 0.119 0.109 0.895
119 Chwele Na 30.915 31.522 0.897
249 Kakamega B (North) Mn 130.041 133.470 0.901
541 Suneka female 0.529 0.600 0.901
175 Gem alt 1193.128 1166.167 0.901
344 Lugari Na 28.192 27.857 0.901
391 Matete PSI 111.212 105.877 0.901
409 Migori goats 1.070 0.906 0.901
497 Rongo age 42.590 41.397 0.901
549 Suneka yield 50.529 47.086 0.901
554 Suneka fertilizer_5 0.147 0.086 0.901
81 Butere valley 0.991 0.983 0.903
106 Chwele legum_intercrop_5 3.175 2.923 0.903
342 Lugari Ca 946.378 988.467 0.903
386 Matete B 0.144 0.151 0.903
424 Migori EC 57.572 56.792 0.903
434 Migori Na 37.542 37.136 0.903
148 Gem lime_fertilizer_5 0.045 0.022 0.908
6 Busia pigs 1.047 0.844 0.911
52 Butere sheep 0.371 0.487 0.911
78 Butere Zn 4.290 4.364 0.911
108 Chwele Cu 1.812 1.875 0.911
272 Kisii age 45.809 44.420 0.911
392 Matete S 11.442 11.164 0.911
490 Rachuonyo alt 1359.050 1311.449 0.911
507 Rongo compost_fertilizer_5 0.692 0.821 0.911
475 Rachuonyo pH 6.182 6.144 0.912
63 Butere Cu 2.609 2.530 0.912
125 Chwele Total.N 0.103 0.099 0.912
59 Butere fertilizer_5 0.095 0.134 0.913
263 Kakamega B (North) hillside 0.159 0.131 0.913
324 Lugari yield 58.636 60.395 0.913
370 Matete dap.acre 48.412 52.225 0.913
390 Matete P 12.972 11.862 0.913
445 Migori alt 1275.836 1310.417 0.913
552 Suneka compost_fertilizer_5 0.824 1.029 0.913
129 Chwele crop.maize 0.950 0.923 0.925
135 Chwele own.sheep 0.050 0.077 0.925
153 Gem Cu 3.353 3.407 0.925
277 Kisii sheep 0.043 0.020 0.925
290 Kisii Exch.Al 0.161 0.139 0.925
371 Matete chemical_fertilizer_5 4.239 4.106 0.925
440 Migori Total.N 0.177 0.179 0.931
590 Webuye chickens 11.524 9.548 0.937
378 Matete Cu 2.336 2.404 0.938
420 Migori legum_maincrop_5 0.140 0.188 0.939
609 Webuye Mn 131.760 128.340 0.939
351 Lugari valley 0.807 0.779 0.941
30 Busia P 8.264 7.408 0.943
447 Migori own.goats 0.326 0.294 0.943
316 Lugari female 0.773 0.744 0.946
128 Chwele hillside 0.075 0.051 0.947
171 Gem valley 0.619 0.644 0.947
265 Kakamega B (North) alt 1012.737 1054.354 0.947
304 Kisii Total.C 2.845 2.877 0.947
306 Kisii valley 0.277 0.240 0.947
308 Kisii hillside 0.723 0.760 0.947
332 Lugari C.E.C 9.406 9.648 0.947
366 Matete pigs 0.283 0.191 0.947
368 Matete field_size 0.609 0.652 0.947
397 Matete hilltop 0.087 0.064 0.947
425 Migori Exch.Al 0.067 0.071 0.947
430 Migori pH 6.186 6.212 0.947
489 Rachuonyo crop.maize 0.929 0.944 0.947
515 Rongo Exch.Al 0.114 0.108 0.947
597 Webuye compost_fertilizer_5 1.810 2.000 0.947
31 Busia PSI 148.837 144.400 0.951
124 Chwele Total.C 1.534 1.496 0.951
310 Kisii alt 1530.459 1571.339 0.951
319 Lugari goats 0.091 0.128 0.951
481 Rachuonyo PSI 141.108 145.216 0.951
560 Suneka Exch.Al 0.166 0.183 0.951
630 Webuye own.sheep 0.071 0.095 0.951
298 Kisii Fe 129.242 127.516 0.955
300 Kisii P 2.809 2.614 0.957
544 Suneka goats 0.235 0.343 0.957
23 Busia Mg 176.760 183.696 0.957
56 Butere chemical_fertilizer_5 2.431 2.765 0.957
156 Gem Hp 0.410 0.400 0.957
166 Gem PSI 128.096 130.272 0.957
245 Kakamega B (North) Exch.Al 0.107 0.100 0.957
281 Kisii chemical_fertilizer_5 2.298 2.120 0.957
282 Kisii compost_fertilizer_5 1.255 1.120 0.957
388 Matete Fe 126.753 128.220 0.957
399 Matete crop.maize 0.935 0.915 0.957
511 Rongo legum_intercrop_5 1.538 1.449 0.957
519 Rongo Mn 150.975 154.477 0.957
581 Suneka own.cows 0.706 0.743 0.968
47 Butere age 44.595 44.008 0.977
155 Gem Exch.Al 0.150 0.145 0.977
8 Busia field_size 0.703 0.733 0.978
10 Busia dap.acre 37.975 40.215 0.978
14 Busia fertilizer_5 0.302 0.356 0.978
17 Busia C.E.C 10.515 10.782 0.978
29 Busia Na 29.657 29.921 0.978
36 Busia valley 0.907 0.889 0.978
51 Butere pigs 0.086 0.101 0.978
77 Butere S 14.108 13.954 0.978
85 Butere alt 1273.988 1263.968 0.978
97 Chwele sheep 0.125 0.103 0.978
99 Chwele yield 61.175 59.231 0.978
100 Chwele dap.acre 45.398 47.258 0.978
103 Chwele lime_fertilizer_5 0.175 0.128 0.978
110 Chwele Exch.Al 0.076 0.070 0.978
157 Gem K 104.789 103.416 0.978
160 Gem pH 5.802 5.791 0.978
162 Gem Ca 785.984 795.618 0.978
177 Gem own.goats 0.224 0.237 0.978
184 Kakamega (South) goats 0.330 0.303 0.978
187 Kakamega (South) sheep 0.154 0.191 0.978
192 Kakamega (South) compost_fertilizer_5 3.088 3.157 0.978
216 Kakamega (South) valley 0.648 0.663 0.978
218 Kakamega (South) hillside 0.308 0.292 0.978
220 Kakamega (South) alt 1411.321 1399.184 0.978
233 Kakamega B (North) field_size 0.671 0.690 0.978
235 Kakamega B (North) dap.acre 47.820 48.786 0.978
268 Kakamega B (North) own.chickens 0.939 0.929 0.978
271 Kisii female 0.745 0.720 0.978
288 Kisii Cu 5.101 5.146 0.978
297 Kisii Ca 1375.107 1398.179 0.978
299 Kisii Na 33.998 33.832 0.978
303 Kisii Zn 5.301 5.373 0.978
311 Kisii own.cows 0.745 0.720 0.978
340 Lugari pH 5.787 5.774 0.978
346 Lugari PSI 128.062 126.435 0.978
350 Lugari Total.N 0.160 0.159 0.978
353 Lugari hillside 0.182 0.198 0.978
372 Matete compost_fertilizer_5 1.783 1.915 0.978
381 Matete Hp 0.335 0.327 0.978
384 Matete Mn 126.124 127.453 0.978
385 Matete pH 5.741 5.765 0.978
413 Migori field_size 0.471 0.458 0.978
426 Migori Hp 0.257 0.262 0.978
432 Migori Ca 1692.366 1723.831 0.978
435 Migori P 19.144 19.842 0.978
438 Migori Zn 6.479 6.544 0.978
442 Migori hilltop 0.081 0.071 0.978
443 Migori hillside 0.442 0.459 0.978
476 Rachuonyo B 0.179 0.182 0.978
484 Rachuonyo Total.C 2.290 2.273 0.978
495 Rachuonyo own.sheep 0.440 0.422 0.978
513 Rongo Cu 3.522 3.474 0.978
518 Rongo Mg 217.827 213.987 0.978
531 Rongo valley 0.833 0.821 0.978
533 Rongo hillside 0.167 0.179 0.978
571 Suneka PSI 144.626 142.453 0.978
578 Suneka hillside 0.206 0.229 0.978
580 Suneka alt 1368.148 1338.315 0.978
583 Suneka own.chickens 0.794 0.771 0.978
626 Webuye own.cows 0.762 0.738 0.978
123 Chwele Zn 3.531 3.571 0.983
502 Rongo sheep 0.513 0.551 0.983
534 Rongo crop.maize 0.756 0.769 0.983
44 Busia own.pigs 0.442 0.422 0.983
331 Lugari legum_intercrop_5 2.716 3.023 0.983
477 Rachuonyo Ca 1487.987 1507.098 0.983
286 Kisii legum_intercrop_5 1.957 1.880 0.992
3 Busia cows 1.512 1.578 0.992
335 Lugari Exch.Al 0.096 0.099 0.993
76 Butere PSI 113.234 112.391 0.994
130 Chwele alt 955.358 937.070 0.997
158 Gem Mg 138.339 139.362 0.997
355 Lugari alt 1077.900 1095.638 0.997
469 Rachuonyo EC 52.448 52.215 0.997
603 Webuye Cu 2.726 2.699 0.997
7 Busia sheep 0.093 0.089 1
12 Busia compost_fertilizer_5 1.372 1.378 1
22 Busia K 130.795 132.082 1
24 Busia Mn 174.610 175.526 1
26 Busia B 0.159 0.160 1
38 Busia hillside 0.093 0.089 1
45 Busia own.sheep 0.070 0.067 1
53 Butere field_size 0.536 0.530 1
61 Butere legum_intercrop_5 2.629 2.622 1
69 Butere Mn 135.295 136.073 1
90 Butere own.sheep 0.138 0.134 1
91 Chwele female 0.725 0.718 1
96 Chwele pigs 0.425 0.436 1
98 Chwele field_size 0.575 0.574 1
105 Chwele legum_maincrop_5 0.050 0.051 1
120 Chwele P 25.547 25.175 1
132 Chwele own.goats 0.225 0.231 1
134 Chwele own.pigs 0.125 0.128 1
139 Gem goats 0.545 0.541 1
145 Gem dap.acre 53.324 53.624 1
147 Gem compost_fertilizer_5 2.664 2.652 1
150 Gem legum_maincrop_5 0.261 0.259 1
152 Gem C.E.C 8.234 8.204 1
161 Gem B 0.110 0.110 1
163 Gem Fe 130.034 130.058 1
165 Gem P 5.694 5.604 1
208 Kakamega (South) Fe 131.662 131.618 1
217 Kakamega (South) hilltop 0.044 0.045 1
264 Kakamega B (North) crop.maize 0.976 0.976 1
285 Kisii legum_maincrop_5 0.319 0.300 1
301 Kisii PSI 210.692 211.678 1
315 Kisii own.sheep 0.021 0.020 1
326 Lugari chemical_fertilizer_5 3.250 3.186 1
327 Lugari compost_fertilizer_5 0.455 0.314 1
328 Lugari lime_fertilizer_5 -1.102 -1.151 1
329 Lugari fertilizer_5 -0.852 -1.058 1
330 Lugari legum_maincrop_5 -1.068 -1.081 1
333 Lugari Cu 2.296 2.306 1
336 Lugari Hp 0.307 0.309 1
338 Lugari Mg 159.265 160.031 1
354 Lugari crop.maize 0.989 0.988 1
357 Lugari own.goats 0.045 0.047 1
362 Matete age 43.870 44.128 1
363 Matete cows 2.217 2.191 1
396 Matete valley 0.500 0.511 1
398 Matete hillside 0.413 0.426 1
401 Matete own.cows 0.761 0.766 1
412 Migori sheep 0.593 0.588 1
436 Migori PSI 110.488 110.625 1
441 Migori valley 0.477 0.471 1
457 Rachuonyo sheep 1.095 1.067 1
460 Rachuonyo dap.acre 73.343 73.779 1
485 Rachuonyo Total.N 0.173 0.173 1
496 Rongo female 0.667 0.667 1
509 Rongo fertilizer_5 -1.064 -1.103 1
516 Rongo Hp 0.348 0.347 1
522 Rongo Ca 1177.134 1182.208 1
523 Rongo Fe 169.246 168.722 1
543 Suneka cows 1.471 1.429 1
555 Suneka legum_maincrop_5 0.088 0.086 1
564 Suneka Mn 188.015 188.691 1
568 Suneka Fe 146.640 146.812 1
576 Suneka valley 0.765 0.771 1
579 Suneka crop.maize 0.941 0.943 1
587 Webuye age 45.024 45.167 1
600 Webuye legum_maincrop_5 0.024 0.024 1
613 Webuye Fe 116.392 116.915 1
621 Webuye valley 0.952 0.952 1
623 Webuye hillside 0.048 0.048 1
628 Webuye own.chickens 0.905 0.905 1
13 Busia lime_fertilizer_5 0.000 0.000 NA
276 Kisii pigs 0.000 0.000 NA
283 Kisii lime_fertilizer_5 0.000 0.000 NA
307 Kisii hilltop 0.000 0.000 NA
314 Kisii own.pigs 0.000 0.000 NA
411 Migori pigs 0.000 0.000 NA
449 Migori own.pigs 0.000 0.000 NA
456 Rachuonyo pigs 0.000 0.000 NA
494 Rachuonyo own.pigs 0.000 0.000 NA
501 Rongo pigs 0.000 0.000 NA
508 Rongo lime_fertilizer_5 0.000 0.000 NA
532 Rongo hilltop 0.000 0.000 NA
539 Rongo own.pigs 0.000 0.000 NA
546 Suneka pigs 0.000 0.000 NA
584 Suneka own.pigs 0.000 0.000 NA
598 Webuye lime_fertilizer_5 0.000 0.000 NA
622 Webuye hilltop 0.000 0.000 NA

Demographic variables:

Agricultural practice variables:

Soil Variables:

write.csv(dist.output, file=paste(od, "ke district balance.csv", sep="/"), row.names=T)

Agronomic Behaviors

suppressMessages(library(stargazer))

d[,grep("_5", names(d))] <- sapply(d[,grep("_5", names(d))], function(x){
  ifelse(x==-99, NA, x)
})

apply(d[,grep("_5", names(d))],2, table)

$chemical_fertilizer_5

0 1 2 3 4 5 296 149 179 207 93 1106

$compost_fertilizer_5

0 1 2 3 4 5 988 176 174 116 55 523

$lime_fertilizer_5

0 1 2 3 5 2007 7 12 1 5

$fertilizer_5

0 1 2 3 4 5 1841 90 39 23 7 29

$legum_maincrop_5

0 1 2 3 4 5 1771 142 56 24 14 25

$legum_intercrop_5

0 1 2 3 4 5 507 174 244 189 150 768

Plot the variables as they are. Should we consider a log transform as we used for the Rwanda variables?

for(i in names(d)[grep("_5", names(d))]) {
  print(
    ggplot(d, aes(x=d[,i])) + geom_histogram(binwidth=1) +
      labs(x = paste("Past five seasons of ", i, sep = ""))
  )
}
## Warning: Removed 5 rows containing non-finite values (stat_bin).

## Warning: Removed 3 rows containing non-finite values (stat_bin).

## Warning: Removed 3 rows containing non-finite values (stat_bin).

## Warning: Removed 6 rows containing non-finite values (stat_bin).

## Warning: Removed 3 rows containing non-finite values (stat_bin).

## Warning: Removed 3 rows containing non-finite values (stat_bin).

d$logFert <- log(d$chemical_fertilizer_5+1)
d$logCompost <- log(d$compost_fertilizer_5+1)
d$logLime <- log(d$lime_fertilizer_5+1)
d$logLegmain <- log(d$legum_maincrop_5+1)
#d$logLegint <- log(d$legum_intercrop_5+1)

logVars <- paste(names(d[grep("log", names(d))]), collapse=" + ")
cor(d[,grep("log", names(d))], use = "complete.obs")
##               logFert logCompost     logLime  logLegmain
## logFert    1.00000000 0.12629624  0.03840900  0.08136505
## logCompost 0.12629624 1.00000000  0.03135643  0.05436087
## logLime    0.03840900 0.03135643  1.00000000 -0.02479574
## logLegmain 0.08136505 0.05436087 -0.02479574  1.00000000

Baseline balance by OAF tenure

Look at farmers by duration of tenure farming with 1AF We want to understand, at least with an initial naive baseline sense, what is the cumulative effect of Tubura practices on soil health outcomes?

We will look only at current 1AF farmers and compare first year farmers to farmers with more experience with Tubura.

oafOnly <- d[which(d$oaf==1 & d$seasons_oaf>=1),]
for(i in 1:length(soilVars)){
print(
  ggplot(oafOnly, aes(x=as.factor(seasons_oaf), y=oafOnly[,soilVars[i]])) + 
    geom_boxplot() + 
    labs(x="OAF Tenure", y=soilVars[i], title = paste("KE baseline soil by tenure - ", soilVars[i], sep = ""))
  )  
}

Tenure summaries

tenureSum <- aggregate(oafOnly[,out.list], by=list(oafOnly$seasons_oaf), function(x){
  round(mean(x, na.rm=T),2)
})
tenureSum <- as.data.frame(t(tenureSum))
colnames(tenureSum) <- c(paste(seq(1,8,1), " seas.", sep=""))
print(kable(tenureSum))
1 seas. 2 seas. 3 seas. 4 seas. 5 seas. 6 seas. 7 seas. 8 seas.
Group.1 1.00 2.00 3.00 4.00 5.00 6.00 7.00 8.00
female 0.71 0.69 0.77 0.67 0.68 0.79 0.60 0.67
age 43.13 45.55 47.37 51.85 51.76 51.34 49.20 58.67
cows 1.82 1.96 2.17 2.87 2.70 2.76 1.60 4.83
goats 0.58 0.45 0.42 0.86 0.71 0.34 0.20 0.33
chickens 11.06 10.73 10.26 12.06 15.95 11.41 8.00 21.00
pigs 0.17 0.21 0.14 0.23 0.21 0.69 0.20 0.83
sheep 0.46 0.52 0.43 0.44 0.54 0.17 0.40 0.17
field_size 0.50 0.54 0.55 0.65 0.53 0.65 0.45 0.62
yield 48.79 62.83 63.74 65.09 63.08 59.62 54.80 51.50
dap.acre 56.65 50.06 50.75 45.26 44.01 42.35 62.93 53.78
can.acre 49.77 42.67 43.78 40.03 41.40 35.16 57.00 40.75
npk.acre 21.01 68.00 20.00 NaN NaN NaN NaN NaN
urea.acre 42.07 52.00 26.67 24.00 NaN 4.00 NaN NaN
chemical_fertilizer_5 3.04 3.48 3.82 4.35 4.37 4.41 4.20 5.00
compost_fertilizer_5 1.61 1.75 1.73 2.12 2.86 2.62 1.20 1.83
lime_fertilizer_5 0.02 0.06 0.02 0.09 0.08 0.00 0.00 0.00
fertilizer_5 0.22 0.25 0.21 0.28 0.16 0.24 0.00 0.00
legum_maincrop_5 0.20 0.17 0.28 0.55 0.57 0.59 0.00 0.00
legum_intercrop_5 2.54 2.51 2.53 2.69 2.89 3.28 2.40 2.17
C.E.C 10.30 10.31 10.58 10.14 9.66 9.54 6.61 9.95
Cu 3.21 3.30 3.24 3.23 3.65 3.54 1.90 3.23
EC 47.98 46.63 47.44 46.19 44.33 41.66 43.20 43.46
Exch.Al 0.13 0.14 0.13 0.11 0.18 0.09 0.14 0.10
Hp 0.37 0.35 0.35 0.34 0.47 0.33 0.37 0.34
K 142.63 141.07 143.31 136.61 131.29 121.91 90.35 130.13
Mg 179.27 182.43 182.20 174.70 168.92 168.02 92.00 159.78
Mn 153.35 161.79 157.42 156.02 169.54 179.08 103.36 152.35
pH 5.82 5.87 5.86 5.87 5.70 5.95 5.69 5.82
B 0.15 0.15 0.16 0.15 0.14 0.12 0.10 0.14
Ca 1009.45 1043.20 1054.48 1045.94 905.38 1020.28 618.14 913.43
Fe 139.24 137.46 132.84 135.61 132.93 144.18 138.20 132.92
Na 31.79 31.54 31.10 30.99 30.57 31.21 29.75 29.81
P 14.49 12.50 10.15 10.50 6.40 6.52 24.05 9.50
PSI 122.41 125.61 130.29 126.75 148.03 123.62 85.38 129.65
S 12.80 13.04 12.88 13.39 14.68 15.45 10.93 13.20
Zn 4.79 4.80 4.67 4.68 4.62 4.91 3.61 4.35
Total.C 2.18 2.15 2.21 2.15 2.16 2.07 1.66 2.10
Total.N 0.16 0.16 0.16 0.16 0.17 0.15 0.12 0.16
valley 0.72 0.72 0.74 0.72 0.68 0.72 0.80 0.67
hilltop 0.03 0.02 0.02 0.03 0.03 0.03 0.00 0.00
hillside 0.25 0.26 0.24 0.24 0.29 0.24 0.20 0.33
crop.maize 0.87 0.93 0.97 0.95 0.98 0.97 1.00 1.00
alt 1265.77 1303.87 1158.44 1222.10 1353.65 1324.20 1499.11 1464.94
own.cows 0.68 0.75 0.81 0.88 0.86 0.90 0.80 1.00
own.goats 0.22 0.22 0.20 0.33 0.32 0.28 0.20 0.33
own.chickens 0.89 0.93 0.90 0.92 0.92 0.86 1.00 1.00
own.pigs 0.05 0.08 0.07 0.09 0.10 0.28 0.20 0.33
own.sheep 0.17 0.17 0.20 0.16 0.14 0.10 0.20 0.17

OAF tenure balance table

oafOnly$tenured <- ifelse(oafOnly$seasons_oaf>=3,0,1)

toMatch <- c("npk", "urea", "g_intercrop")
tenureList <- out.list[!out.list %in% unique(grep(paste(toMatch, collapse="|"), out.list, value=TRUE))]


tenure <- do.call(rbind, lapply(tenureList, function(x) {
  
  out <- t.test(oafOnly[,x] ~ oafOnly[,"tenured"], data=oafOnly)
  tab <- data.frame(out[[5]][[2]],out[[5]][[1]], out[3])
  tab[,1:2] <- round(tab[,1:2],3)
  names(tab) <- c(names(out[[5]]), "pvalue")
  return(tab)
}))

# use p.adjust with bonferroni correction
tenure$pvalue <- p.adjust(tenure$pvalue, method="fdr")
rownames(tenure) <- tenureList
tenure <- tenure[order(tenure$pvalue),]
tenure$pvalue <- ifelse(tenure[, 3] < 0.001, "< 0.001", round(tenure[, 3], 3)) 
colnames(tenure) <- c("Tenured (3+)","New (< 3)", "p-value")    
print(kable(tenure))
Tenured (3+) New (< 3) p-value
chemical_fertilizer_5 3.217 4.141 < 0.001
age 44.094 49.980 < 0.001
yield 54.379 63.318 < 0.001
own.cows 0.711 0.848 < 0.001
cows 1.877 2.549 < 0.001
crop.maize 0.898 0.966 < 0.001
P 13.700 9.457 < 0.001
legum_maincrop_5 0.189 0.425 0.002
dap.acre 53.582 47.490 0.005
field_size 0.517 0.585 0.007
compost_fertilizer_5 1.665 2.112 0.007
S 12.892 13.540 0.009
own.pigs 0.059 0.103 0.061
can.acre 45.950 41.428 0.066
Na 31.688 30.939 0.075
PSI 123.685 131.293 0.106
EC 47.440 45.912 0.133
Fe 138.535 134.661 0.182
own.goats 0.216 0.264 0.224
K 142.012 136.476 0.399
Zn 4.793 4.661 0.399
Total.N 0.157 0.160 0.399
alt 1280.940 1235.703 0.401
legum_intercrop_5 2.531 2.695 0.423
chickens 10.931 12.046 0.519
Mn 156.708 160.161 0.586
Mg 180.529 174.821 0.587
female 0.698 0.724 0.618
Cu 3.242 3.315 0.668
pigs 0.184 0.236 0.702
sheep 0.485 0.425 0.702
Exch.Al 0.135 0.127 0.702
goats 0.532 0.586 0.702
lime_fertilizer_5 0.032 0.049 0.702
C.E.C 10.299 10.132 0.727
B 0.148 0.145 0.767
Hp 0.360 0.368 0.798
fertilizer_5 0.231 0.216 0.905
Ca 1022.886 1013.531 0.905
Total.C 2.171 2.162 0.905
valley 0.718 0.724 0.905
hillside 0.258 0.250 0.905
pH 5.843 5.840 0.94
hilltop 0.025 0.026 0.94
own.chickens 0.906 0.908 0.94
own.sheep 0.174 0.172 0.94

Demographic variables:

Agricultural practice variables:

Soil Variables:

Keep only soil variables with reliable r2 values for the regressions. Confirm this list with Emmanuel and confirm when we’ll get the C and N estimates

#soilVars <- c("C.E.C", "Cu", "EC", "Exch.Al", "Hp", "K", "Mg", "Mn",
#             "pH", "B", "Ca", "Fe", "Na", "P", "PSI", "S", "Zn")

soilVars <- c("Mg", "pH", "Ca", "Exch.Al", "C.E.C", "Total.C", "Total.N")
list1 <- lapply(soilVars, function(x){
  mod <- lm(as.formula(paste("d[,x] ~",  logVars, "+ as.factor(SiteName)", sep="")), data=d)
  return(mod)
})

Table 17 - years of input use

plm.log <- function(x, range){
  
  beta = round(summary(x)$coefficients[range,1],3)
  beta.pval = summary(x)$coefficients[range,4]
  beta.conv = ifelse(beta.pval < 0.01, "***", ifelse(
    beta.pval < 0.05, "**", ifelse(
      beta.pval < 0.1, "*", "")))
  #beta.pval = round(beta.pval, 3)
  outcome = paste(beta, beta.conv, sep = "")
  outcome = c(outcome, unique(round(summary(x)$coefficients[1,1],3)))
  res = data.frame(outcome, stringsAsFactors = F)
  
  return(res)  
}

ke.table17 <- do.call(cbind, lapply(list1, function(x){
  plm.log(x, 2:5)
}))
colnames(ke.table17) <- soilVars
rownames(ke.table17) <- c(unlist(strsplit(gsub(" \\+ ", " ", logVars), " ")), "constant")
ke.table17 <- ke.table17[,c("pH","Total.C", "Total.N", "Ca", "Mg", "Exch.Al", "C.E.C")]

write.csv(ke.table17, file=paste(od, "ketable17.csv", sep="/"), row.names=T)
# stargazer for table 17
stargazer(list1, type="html", 
          title = "2016A Kenya Soil Health Baseline - Agronomic Practice Models (log)",
          covariate.labels = c("Seasons of Fertilizer", "Seasons of Compost", 
                               "Seasons of Lime", "Seasons of Legume (main)",
                               "Seasons of Legume (inter)"),
          dep.var.caption = "",
          dep.var.labels = "",
          column.labels = c(gsub("m3.","", soilVars)),
          notes = "Includes FE for site",
          omit=c("SiteName"), out=paste(od, "ke_baseline_agprac.htm",sep="/"))

Naive tenure models

stargazer(list2, type="html", 
          title = "2016A Kenya Soil Health Baseline - Naive Tenure Models",
          covariate.labels = c("OAF Tenure"),
          dep.var.caption = "",
          dep.var.labels = "",
          column.labels = c(gsub("m3.","", soilVars)),
          notes = "Includes FE for site",
          omit=c("SiteName"), out=paste(od, "ke_baseline_tenure.htm",sep="/"))
list3 <- lapply(soilVars, function(x){
  mod <- lm(as.formula(paste("d[,x] ~",  logVars, "+ seasons_oaf + as.factor(SiteName)", sep="")), data=d)
  return(mod)
})
stargazer(list3, type="html", 
          title = "2016A Kenya Soil Health Baseline - Ag Practice and Tenure",
          covariate.labels = c("Seasons of Fertilizer", "Seasons of Compost", 
                               "Seasons of Lime", "Seasons of Legume (main)",
                               "Seasons of Legume (inter)", "OAF Tenure"),
          dep.var.caption = "",
          dep.var.labels = "",
          column.labels = c(gsub("m3.","", soilVars)),
          notes = "Includes FE for site",
          omit=c("SiteName"), out=paste(od, "ke_baseline_ag_tenure.htm", sep="/"))

Table 18 - client product use and soil outcomes

First, convert all quantites to kg/acre and kg/ha. Then check for collinearity, set up and execute models

cor(d[,acreInputs[c(1:2,5)] ], use="complete.obs")
##                  dap.acre    can.acre compost.acre
## dap.acre      1.000000000  0.73173139 -0.004010445
## can.acre      0.731731394  1.00000000 -0.021841883
## compost.acre -0.004010445 -0.02184188  1.000000000

As with the Rwanda baseline soil health study, agricultural input quantity use is highly correlated.

plm.t16 <- function(x, range){
  
  beta = round(summary(x)$coefficients[range,1],3)
  beta.pval = round(summary(x)$coefficients[range,4],3)
  beta.conv = ifelse(beta.pval < 0.01, "***", ifelse(
    beta.pval < 0.05, "**", ifelse(
      beta.pval < 0.1, "*", "")))
  #beta.pval = round(beta.pval, 3)
  outcome = paste(beta, " (", beta.pval, ")", sep = "")
  outcome = c(outcome, unique(round(summary(x)$coefficients[1,1],3)))
  res = data.frame(outcome, stringsAsFactors = F)
  
  return(res)  
}
previousSeasonfert <- paste("dap.acre", "as.factor(SiteName)", sep=" + ")

list.t18 <- lapply(soilVars, function(x){
  mod <- lm(as.formula(paste("d[,x] ~", previousSeasonfert, sep="")), data=d)
  return(mod)
})

table18 <- do.call(cbind, lapply(list.t18, function(x){
  plm.t16(x, 2)
}))
colnames(table18) <- soilVars
rownames(table18) <- c("DAP (kg/acre)", "constant")

table18 <- table18[,c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")]
write.csv(table18, file=paste(od, "table18_dap.csv", sep = "/"),
          row.names = T)
previousSeasoncompost <- paste("compost.acre", "as.factor(SiteName)", sep=" + ")
list.t18b <- lapply(soilVars, function(x){
  mod <- lm(as.formula(paste("d[,x] ~", previousSeasoncompost, sep="")), data=d)
  return(mod)
})

table18b <- do.call(cbind, lapply(list.t18b, function(x){
  plm.t16(x, 2)
}))
colnames(table18b) <- soilVars
rownames(table18b) <- c("Compost (kg/acre)", "constant")

table18b <- table18b[,c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")]
write.csv(table18b, file=paste(od, "table18_compost.csv", sep = "/"),
          row.names = T)
plm.t18 <- function(x, range){
  
  beta = round(summary(x)$coefficients[range,1],4)
  beta.pval = round(summary(x)$coefficients[range,4],3)
  beta.conv = ifelse(beta.pval < 0.01, "***", ifelse(
    beta.pval < 0.05, "**", ifelse(
      beta.pval < 0.1, "*", "")))
  #beta.pval = round(beta.pval, 3)
  outcome = paste(beta, " (", beta.pval, ")", sep = "")
  outcome = c(outcome, unique(round(summary(x)$coefficients[1,1],3)))
  res = data.frame(outcome, stringsAsFactors = F)
  
  return(res)  
}

previousSeasonCan <- paste("can.acre", "as.factor(SiteName)", sep=" + ")
list.t18c <- lapply(soilVars, function(x){
  mod <- lm(as.formula(paste("d[,x] ~", previousSeasonCan, sep="")), data=d)
  return(mod)
})

table18c <- do.call(cbind, lapply(list.t18c, function(x){
  plm.t18(x, 2)
}))
colnames(table18c) <- soilVars
rownames(table18c) <- c("CAN (kg/acre)", "constant")

table18c <- table18c[,c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")]
write.csv(table18c, file=paste(od, "table18_can.csv", sep = "/"),
          row.names = T)

District and site level summaries of soil and managment practices

dist.sum <- aggregate(d[,out.list], by=list(d$DistrictName), function(x){
  return(c(
    paste(
      round(mean(x, na.rm=T),3), " (", round(median(x, na.rm=T),3), ")", 
      " (", round(sd(x, na.rm=T),2), ")", sep = ""
    )
    ))
})
write.csv(dist.sum, file=paste(od, "district covariate summary.csv", sep = "/"))
site.sum <- aggregate(d[,out.list], by=list(d$SiteName), function(x){
  return(c(
    paste(
      round(mean(x, na.rm=T),3), " (", round(median(x, na.rm=T),3), ")", 
      " (", round(sd(x, na.rm=T),2), ")", sep = ""
    )
    ))
})
write.csv(site.sum, file=paste(od, "site covariate summary.csv", sep = "/"))

Truncated site level summary (site count > 10 = 25 sites)

largerSite <- table(d$SiteName)>10
largerSite <- rownames(as.matrix(largerSite[largerSite==T]))

site.sum.tr <- aggregate(d[d$SiteName %in% largerSite,out.list], by=list(d[d$SiteName %in% largerSite, "SiteName"]), function(x){
  return(c(
    paste(
      round(mean(x, na.rm=T),3), " (", round(median(x, na.rm=T),3), ")", 
      " (", round(sd(x, na.rm=T),2), ")", sep = ""
    )
    ))
})
write.csv(site.sum.tr, file=paste(od, "site truncated covariate summary.csv", sep = "/"))

Female farmers farming poorer soils?

toRemove <- "female"
genderBalance <- out.list[!out.list %in% toRemove]

equal <- do.call(rbind, lapply(genderBalance, function(x){
    
    out <- t.test(d[,x] ~ d[,"female"], data=d)
    tab <- data.frame(out[[5]][[2]],out[[5]][[1]], out[3])
    tab[,1:2] <- round(tab[,1:2],3)
    names(tab) <- c(names(out[[5]]), "pvalue")
    #tab[,3] <- p.adjust(tab[,3], method="holm")
    #tab[,3] <- ifelse(tab[,3] < 0.001, "< 0.001", round(tab[,3],3))
    #print(tab)
    return(tab)
  
}))

rownames(equal) <- NULL

# order variables 
equal$pvalue <- p.adjust(equal$pvalue, method="fdr")
rownames(equal) <- genderBalance
equal <- equal[order(equal$pvalue),]

equal$pvalue <- ifelse(equal$pvalue < 0.001, "< 0.001", round(equal$pvalue,3))
colnames(equal) <- c("Male Farmers","Female Farmers", "p-value")    
write.csv(equal, file=paste(od, "female farmer status.csv", sep = "/"), row.names=T)

Propensity Score Matching

We need to do a more rigorous job of accounting for differences between Tubura farmers and identified control farmers. Execute propensity score matching (PSM) to identify control farmers that overlap with Tubura farmers with regard to their likelihood of being a Tubura farmer.

psmVars <- paste(out.list <- c("female", "age", "cows", "goats", "chickens","pigs", "sheep", "SiteName"), collapse=" + ")

psmCheck <- unlist(strsplit(gsub("\\+", "", as.vector(psmVars))," ", fixed=TRUE))
psmCheck <- psmCheck[-which(psmCheck=="")]
naOut <- cbind(unlist(lapply(psmCheck, function(x){sum(is.na(d[,x]))})), psmCheck)

naOut
##          psmCheck  
## [1,] "0" "female"  
## [2,] "0" "age"     
## [3,] "0" "cows"    
## [4,] "0" "goats"   
## [5,] "0" "chickens"
## [6,] "2" "pigs"    
## [7,] "0" "sheep"   
## [8,] "0" "SiteName"

We have missing values that are preventing the prediction function that preceeds PSM from running properly. Let’s for now just remove yield as a conributing variable.

h <- d[complete.cases(d[,psmCheck]),]
psmVars <- paste(psmCheck, collapse=" + ")

reg <- glm(as.formula(paste("oaf ~", psmVars, sep="")),  family= binomial(link="logit"), data=h)

# summarize predicted probabilities
pr <- data.frame(pr_score = predict(reg, type='response'), treat = h$oaf)

# graph
psmGraph <- ggplot() + geom_histogram(data=subset(pr, pr$treat==1), aes(x = pr_score, y=..count.., fill=as.factor(treat)), bins=80, position = "identity") + geom_histogram(data=subset(pr, pr$treat==0), aes(x=pr_score, y=-..count.., fill=as.factor(treat)), bins=80, position = "identity") +
    scale_y_continuous(limits=c(-150,150)) + 
  labs(title ="PSM score overlap", x = "PSM score", y="Farmer count",
       fill="1AF/Control")

print(psmGraph)

pdf(file=paste(od, "ke_baseline_psm_overlap.pdf", sep="/"), height=8.5, width=11)
print(psmGraph)
dev.off()
## quartz_off_screen 
##                 2
d$age2 <- d$age^2

coreVars = c("female", "age", "SiteName", "cows", "goats", "chickens", "pigs", "sheep")

psmList <- list(
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Ca"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Mg"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="pH"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Total.C"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Total.N"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="chemical_fertilizer_5"),
    list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="Exch.Al"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="compost_fertilizer_5"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="legum_maincrop_5"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="legum_intercrop_5"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="logFert"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="dap.acre"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="can.acre"),
  list(tr = "oaf",
       psmVars = paste(coreVars,
                   collapse=" + "),
       y="compost.acre")
)

#lime_fertilizer_5 >> so few farmers use it that we get a bad match.


# PSM
set.seed(20161102)
m <- lapply(psmList, function(listInput){

  naCheck <- unlist(strsplit(gsub("\\+", "", as.vector(listInput$psmVars))," ", fixed=TRUE))
  naCheck <- naCheck[-which(naCheck=="")]
  
  
  # keep complete cases of outcome variable
  k <- d[complete.cases(d[,naCheck]),]
  k <- k[complete.cases(k[,listInput$y]),]
  
  # run glm regression:
  reg <- glm(as.formula(paste(listInput$tr, "~", listInput$psmVars, sep="")),  family= binomial(link="logit"), data=k)
  
  suppressWarnings(
  mod <- Match(Y = k[,listInput$y], Tr = k[,listInput$tr], X = reg$fitted.values, ties=FALSE, replace=FALSE, caliper=0.25, estimand = "ATE")    
  )
  matchRes <- MatchBalance(k[,listInput$tr] ~ k[,listInput$y], match.out=mod, nboots=500, data=k, print.level = 0)
  #print(listInput$y)
  return(list(mod, matchRes))
  
})

The models can now vary by outcome. Let’s see if we can improve our results.

matchRes <- do.call(rbind, lapply(1:length(m), function(model){
  val <- as.data.frame(cbind(
    standard.diff=m[[model]][[2]]$AfterMatching[[1]]$sdiff, 
    var.ratio = m[[model]][[2]]$AfterMatching[[1]]$var.ratio,
    sdiff.adj = m[[model]][[2]]$AfterMatching[[1]]$sdiff/100))
  return(val)
}))

namesInput <- NULL
for(i in 1:length(psmList)){
  namesInput[i] <- psmList[[i]]$y
}
rownames(matchRes) <- namesInput
print(kable(matchRes))
standard.diff var.ratio sdiff.adj
Ca -14.3159103 0.6886026 -0.1431591
Mg -4.4813456 1.1222964 -0.0448135
pH -13.5395683 0.8461136 -0.1353957
Total.C 3.4008495 1.0176179 0.0340085
Total.N 1.2272466 1.0020597 0.0122725
chemical_fertilizer_5 5.9982115 0.9357194 0.0599821
Exch.Al 5.1213209 1.3879332 0.0512132
compost_fertilizer_5 -3.8693681 0.9312421 -0.0386937
legum_maincrop_5 1.9965749 1.0861579 0.0199657
legum_intercrop_5 -22.3961814 0.9569097 -0.2239618
logFert 6.5937830 0.9145441 0.0659378
dap.acre -12.0466746 0.6728032 -0.1204667
can.acre -23.9287207 0.5210046 -0.2392872
compost.acre 0.4900824 0.9413469 0.0049008
write.csv(matchRes, file=paste(od, "ke psm performance.csv", sep = "/"), row.names=T)

So few farmers use lime fertilizer that we don’t get a good match. I could make it binary but it’s so overwhelmingly the case that none of the farmers use lime that I’m just going to drop it.

We also don’t get super great matches on DAP and CAN. I’m going to push forward with this but I’m going to push forward with the results. We should flag this.

coefTable <- do.call(rbind, lapply(1:length(m), function(model){
  beta = round(m[[model]][[1]]$est.noadj,3)
  mean.Tr = round(m[[model]][[2]]$AfterMatching[[1]][[3]], 2)
  mean.Co = round(m[[model]][[2]]$AfterMatching[[1]][[4]], 2)
  pval = m[[model]][[2]]$AfterMatching[[1]][[10]][[3]] # p.value
  #pval = (1 - pnorm(abs(m[[model]][[1]]$est/m[[model]][[1]]$se.standard))) * 2
  pval = ifelse(pval < 0.001, "0.001", round(pval, 3))
  
  res = data.frame(beta, mean.Tr, mean.Co, pval, stringsAsFactors = F)
  return(res)
}))
row.names(coefTable) <- namesInput
coefTable$pval.adj <- round(p.adjust(coefTable$pval, method="fdr"),3)
print(kable(coefTable))
beta mean.Tr mean.Co pval pval.adj
Ca -80.336 995.88 1076.21 0.001 0.004
Mg -4.169 174.81 178.98 0.197 0.306
pH -0.062 5.83 5.89 0.001 0.004
Total.C 0.019 2.16 2.14 0.336 0.428
Total.N 0.001 0.16 0.16 0.732 0.788
chemical_fertilizer_5 0.113 3.58 3.47 0.098 0.196
Exch.Al 0.010 0.13 0.12 0.125 0.219
compost_fertilizer_5 -0.082 1.89 1.97 0.287 0.402
legum_maincrop_5 0.016 0.26 0.24 0.558 0.651
legum_intercrop_5 -0.449 2.70 3.15 0.001 0.004
logFert 0.041 1.38 1.34 0.072 0.168
dap.acre -3.345 51.64 54.98 0.012 0.034
can.acre -5.537 44.40 49.93 0.001 0.004
compost.acre 9.499 266.24 256.74 0.894 0.894
write.csv(coefTable, file=paste(od, "ke psm coefficients.csv", sep = "/"),
          row.names = T)

# sort by the order Eric wants
t7order <- c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al")
table7vars <- paste(t7order, collapse = "|")
table7 <- coefTable[grep(table7vars, rownames(coefTable)), ]
table7 <- table7[order(match(rownames(table7), t7order)),]

write.csv(table7, file=paste(od, "table7.csv", sep = "/"), row.names = T)

# 11/17 added lime
t8order <- c("chemical_fertilizer_5", "compost_fertilizer_5", "legum_maincrop_5",
             "legum_intercrop_5")
table8vars <- paste(t8order, collapse = "|")
table8 <- coefTable[grep(table8vars, rownames(coefTable)), ]
table8 <- table8[order(match(rownames(table8), t8order)),]

write.csv(table8, file=paste(od, "table8.csv", sep = "/"), row.names = T)

#table 3
t9order <- c("dap.acre", "can.acre", "compost.acre")
table9vars <- paste(t9order, collapse = "|")
table9 <- coefTable[grep(table9vars, rownames(coefTable)), ]
table9 <- table9[order(match(rownames(table9), t9order)),]
write.csv(table9, file=paste(od, "table9.csv", sep = "/"))
thresh <- d %>% group_by(oaf) %>% dplyr::summarize(
  count = n(),
  ph = sum(pH<5.8),
  carbon = sum(Total.C < 2),
  nitrogen = sum(Total.N < 0.1),
  calcium = sum(Ca < 720),
  magnesium = sum(Mg < 100)
  #aluminum = sum(ExAl)
) %>% mutate(
  under.ph = paste(paste(round(ph/count,4)*100, "%", sep=""), " (", ph, ")", sep=""),
  under.carbon = paste(paste(round(carbon/count,4)*100,"%", sep=""), " (", carbon, ")", sep=""),
  under.nitrogen = paste(paste(round(nitrogen/count,4)*100, "%", sep=""), " (", nitrogen, ")", sep=""),
  under.calcium = paste(paste(round(calcium/count,4)*100, "%", sep=""), " (", calcium, ")", sep=""),
  under.mag = paste(paste(round(magnesium/count,4)*100,"%", sep=""), " (", magnesium, ")", sep="")
) %>% as.data.frame() 

thresh <- thresh[, c("oaf", names(thresh)[grep("under", names(thresh))])]
thresh <- t(thresh)
colnames(thresh) = thresh[1, ] # the first row will be the header
colnames(thresh) = c("non-client", "client")
thresh = thresh[-1, ]

write.csv(thresh, file=paste(od, "table1_rw_thresholds.csv", sep = "/"), row.names = T)

OAF tenure model - table 14 with PSM

tenureTab.add <- lapply(1:length(m), function(model){
  
  dm <- as.data.frame(rbind(d[m[[model]][[1]]$index.treated,], 
              d[m[[model]][[1]]$index.control,]))
  dm$client_tenure <- dm$oaf*dm$seasons_oaf
  mod <- lm(as.formula(paste("dm[,psmList[[model]]$y] ~", "seasons_oaf + as.factor(SiteName)", sep ="")), data=dm)
  return(mod)
})

modNames <- unlist(lapply(psmList, function(x){
  return(x$y)
}))
plm.tenure <- function(x){
  
  intercept = round(x$coefficients[[1]],3)
  beta = round(x$coefficients[[2]],3)
  int.pval = summary(x)$coefficients[1,4]
  int.pval = ifelse(int.pval < 0.001, "< 0.001", round(as.numeric(int.pval),3))
  beta.pval = summary(x)$coefficients[2,4]
  beta.pval = ifelse(beta.pval < 0.001, "< 0.001", round(as.numeric(beta.pval),3))
  res = data.frame(intercept, int.pval, beta, beta.pval, stringsAsFactors = F)
  
  return(res)  
}

tenure.reg <- do.call(rbind, lapply(tenureTab.add, function(x){
  plm.tenure(x)
}))

rownames(tenure.reg) <- modNames
t14order <- c("pH", "Total.C", "Total.N", "Ca", "Mg", "Exch.Al", "chemical_fertilizer_5",
             "logFert", "compost_fertilizer_5", "legum_maincrop_5", "n_season_fallow")
t14vars <- paste(t14order, collapse = "|")
tenure.reg <- tenure.reg[grep(t14vars,rownames(tenure.reg)),]
tenure.reg <- tenure.reg[order(match(row.names(tenure.reg), t14order)),]

write.csv(tenure.reg, file=paste(od, "table14.csv", sep = "/"), row.names=T)

Mapping

Map of baseline observations

Produce a simple map of where our observations are

if (!(exists("kenya"))){
  # Only need to geocode once per session library(dismo)
  kenya <- try(geocode("Kenya"))
  # If the internet fails, use a local value 
  if (class(kenya) == "try-error") {
    kenya <- ""
  } 
}

See here for more on using markerClusterOptions in leaflet.

In the map below, the larger green circles are One Acre Fund farmers and the smaller blue circles are control farmers.

e <- d[!is.na(d$lon),]
ss <- SpatialPointsDataFrame(coords = e[, c("lon", "lat")], data=e)

pal <- colorNumeric(c("navy", "green"), domain=unique(ss$oaf))
map <- leaflet() %>% addTiles() %>%
  setView(lng=kenya$longitude, lat=kenya$latitude, zoom=6) %>%
  addCircleMarkers(lng=ss$lon, lat=ss$lat, 
                   radius= ifelse(ss$oaf==1, 10,6),
                   color = pal(ss$oaf),
clusterOptions = markerClusterOptions(disableClusteringAtZoom=12, spiderfyOnMaxZoom=FALSE))

map

Notes: We have a lot of GPS points piled on the Bungoma office. We need to address the GPS collection practices that lead to incorrectly logged GPS.

–end